**********************************
** PREPARE DATA FOR ESTIMATION  **
**********************************

*Load dataset
set more off
clear all
use chileall_$ind.dta

*Trim top and bottom 5% of input price distribution
centile int_input_price, centile(5, 95)
gen lower = r(c_1)
gen upper = r(c_2)
drop if int_input_price < lower | int_input_price > upper

*Re-name variables, adjust scale, and generate additional variables for estimation
gen double firm_prod = firm * 10000 + cpc3
rename year time
rename firm_id id
tset firm_prod time

gen double p = l_p_fjt_dom
gen double adrat2 = ad_pq_it / f1_sale_it
gen ad = l.adrat2

rename l_rr_ft r
rename l_rk k
rename l_rwl l
rename l_ri i
*Alternative input measure not using firm-level prices
*replace l_ri = ln(ri_alt)
rename l_y_ft yg

foreach var of varlist r k l i yg   {
   replace `var' = `var' - ln(1000)
   }

replace i = i - 5
replace l = l - 3

gen double ll=l*l
gen double kk=k*k
gen double ii=i*i
gen double lk=l*k
gen double li=l*i
gen double ki=k*i
gen double pp=p*p
gen double lp=l*p
gen double kp=k*p
gen double ip=i*p

gen double ap = ad*p
gen double aa = ad*ad
gen double yd = (ln(y_fjt_dom) - 13)/2

gen double si = l_s_ft_dom  /* s_ft_dom is only defined for single product firms  */

gen l_p_tot = ln(p_tot)
bys id cpc3: egen s_yd = sum(yd)
bys id cpc3: egen c_yd = count(yd)
gen m_yd = (s_yd - yd) / (c_yd-1)
tset firm_prod time

ta time, gen(d_t_)
ta cpc3, gen(d_cpc_)
 
gen yd2 = yd^2
gen ydp = yd*p

*Trim Extreme Outliers of the Share
gen double si_GNR = exp(l_s_ft)
replace si = . if si_GNR > 1.0    
replace si = . if si_GNR < 0.02

*Define Overall Flag Varible For Observations Used in Estimation
gen flag_all_p_dom = 1
replace flag_all_p_dom = 0 if flag_p_dom == 0
replace flag_all_p_dom = 1 if small_prod == 1


***************************
** SINGLE-PRODUCT FIRMS  **
***************************

*FIRST STAGE 

*Instrument for domestic quantity 
reg yd l k i ll kk ii lk li ki p pp lp kp ip ad ap d_t_* d_cpc_* l.l l.k if num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0
predict yd_hat if e(sample)

gen yy = yd_hat^2
gen yp = yd_hat*p
gen yl = yd_hat*l
gen yk = yd_hat*k
gen yi = yd_hat*i
gen ya = yd_hat*ad

_rmcoll l k i ll kk ii lk li ki p pp lp kp ip ad ap yd_hat ya yp d_t_* d_cpc_* if num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0, forcedrop
local xvar `r(varlist)'
local count: word count `xvar'
di `count'
local count = `count' + 1

*First stage share regression--single-product firms only--starting values
regress si `xvar' if num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0
matrix test = e(b)

*First stage share regression--single-product firms only
#delimit;
nl elas @ si `xvar'  if l~=. & k~=. & si~=. & i~=. & p~=. & ad~=. & num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0, 
nparameters(`count') initial(test);
#delimit cr

*Predict shares and epsilon
predict ie_f if l~=. & k~=. & si~=. & i~=. & p~=. & num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0
predict ego if l~=. & k~=. & si~=. & i~=. & p~=. & num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0, resid
replace ego=-ego
replace ie_f=exp(ie_f)


*SECOND STAGE

*Generate variables for second stage based on first stage estimates
gen double qg=yg - ego if yg~=. & i~=. & l~=. & k~=. & ego~=. 
tset firm_prod time
gen double qg_1=L.qg
gen double l_1=L.l
gen double k_1=L.k
gen double i_1=L.i
gen double i_2=L2.i
gen double p_1 = L.p

*Starting values
reg qg l k i if num_prod == 1 & flag_p == 0 & flag_all_p_dom == 0
matrix test2 = e(b)
matrix test3 = test2[1,1..3]
matrix test3[1,1] = test2[1,"l"]
matrix test3[1,2] = test2[1,"k"]
matrix test3[1,3] = test2[1,"i"]
matrix drop test2

replace qg = . if num_prod ~= 1 | flag_p ~= 0 | flag_all_p_dom ~= 0 
replace qg_1 = . if l.num_prod ~= 1 | l.flag_p ~= 0 | l.flag_all_p_dom ~= 0

*Second Stage GMM
gmm gmm_prod if qg!=. & l!=. & k!=. & i!=. & qg_1!=. & l_1!=. & k_1!=. & i_1!=. & p_1!= . & i_2 ~= ., technique(gn) igm nequations(4) parameters(al ak ai) from(test3) winitial(identity) rhs(qg l k i qg_1 l_1 k_1 i_1 cpc3 p_1 i_2) conv_maxiter(150)


*SAVE AND COMPILE ESTIMATES
mat beta=e(b)
svmat double beta
ren beta1 al
ren beta2 ak
ren beta3 ai
foreach var of varlist al-ai {
	egen double s`var'=mean(`var')
	drop `var'
	ren s`var' `var'
}	

*Generate productivity and output elasticities
gen double temp=al*l+ak*k+ai*i if l!=. & k!=. & i!=.
gen double logprod_spf=qg-temp
gen double prod_spf=exp(logprod_spf)
gen double lelas=al 
gen double kelas=ak 
gen double ielas=ai
replace lelas = . if flag_p ~= 0 | flag_all_p_dom ~= 0
replace kelas = . if flag_p ~= 0 | flag_all_p_dom ~= 0
replace ielas = . if flag_p ~= 0 | flag_all_p_dom ~= 0

*Domestic Markups
gen double ln_temp_m = ln(ie_f) - ln(ielas)
replace ln_temp_m = - ln_temp_m
gen double markup_dom_spf = exp(ln_temp_m)

*All Foreign Markups
#delimit ;
local countries "AFG AGO AIA ALB ANT ARG ARM ATG AUS AUT AZE BDI BEL BEN BGD BGR BHR 
                   BHS BIH BLR BLZ BMU BOL BRA BRB CAF CAN CHE CHN CIV CMR COD COG 
				   COL CPV CRI CUB CYP CZE DEU DMA DNK DOM DZA ECU EGY ESP EST ETH
				   FIN FJI FLK FRA FSM GAB GBR GEO GHA GIN GMB GNQ GRC GRD GRL GTM 
				   GUY HKG HND HRV HTI HUN IDN IND IRL IRN IRQ ISL ISR ITA JAM JOR
				   JPN KAZ KEN KGZ KHM KIR KNA KOR KWT LAO LBN LBR LBY LCA LKA LTU 
				   LUX LVA MAC MAR MCO MDA MDG MDV MEX MHL MKD MLI MLT MMR MNG MOZ 
				   MRT MUS MWI MYS NAM NER NGA NIC NLD NOR NPL NZL OMN PAK PAN PER 
				   PHL PNG POL PRK PRT PRY QAT ROU RUS SAU SDN SEN SGP SLB SLE SLV 
				   SMR SPM SRB STP SUR SVK SVN SWE SWZ SYC SYR TGO THA TJK TON TTO 
				   TUN TUR TUV TWN TZA UAE UGA UKR URY USA UZB VCT VEN VNM VUT WSM 
				   YEM ZAF ZMB ZWE
				   ABW COK CYM GUM MSR MTQ NCL PRI PYF TCA VGB VIR" ;
#delimit cr
foreach coun of local countries  {
capture gen double markup_`coun'_spf = markup_dom_spf * (p_ft_`coun' / p_ft_dom)
}


**************************
** MULTI-PRODUCT FIRMS  **
**************************

*PREPARE DATA

*Rename variables for single-product firms with "_spf"
rename si si_spf
rename ego ego_spf
rename ie_f ie_f_spf
replace yg = l_y_fjt
gen double si = l_s_fjt_dom 

*Compute Product-Specific Inputs
drop ll kk ii lk li ki lp kp ip
drop yd_hat yy yp yl yk yi ya
rename l l_tot 
rename k k_tot 
rename i i_tot
gen double l = l_tot + l_rho_fjt
gen double k = k_tot + l_rho_fjt
gen double i = i_tot + l_rho_fjt

replace yg = yg - ln(1000)

gen double ll=l*l
gen double kk=k*k
gen double ii=i*i
gen double lk=l*k
gen double li=l*i
gen double ki=k*i
gen double lp=l*p
gen double kp=k*p
gen double ip=i*p


*FIRST STAGE

*Instrument for domestic quantity 
reg yd l k i ll kk ii lk li ki p pp lp kp ip ad ap d_t_* d_cpc_* l.l l.k if num_prod ~= 1 & flag_p == 0 & flag_all_p_dom == 0
predict yd_hat if e(sample)

gen yy = yd_hat^2
gen yp = yd_hat*p
gen yl = yd_hat*l
gen yk = yd_hat*k
gen yi = yd_hat*i
gen ya = yd_hat*ad

_rmcoll l k i ll kk ii lk li ki p pp lp kp ip ad ap yd_hat ya yp d_t_* d_cpc_* if num_prod ~= 1 & flag_p == 0 & flag_all_p_dom == 0, forcedrop
local xvar `r(varlist)'
local count: word count `xvar'
di `count'
local count = `count' + 1

*First stage share regression--multi-product firms--starting values
regress si `xvar' if num_prod ~= 1 & flag_p == 0 & flag_all_p_dom == 0
matrix test = e(b)

*First stage share regression--multi-product firms 
#delimit;
nl elas @ si `xvar'  if l~=. & k~=. & si~=. & i~=. & p~=. & ad~=. & num_prod ~= 1 & flag_p == 0 & flag_all_p_dom == 0, 
nparameters(`count') initial(test);
#delimit cr

*Predict shares and epsilon
predict ie_f if l~=. & k~=. & si~=. & i~=. & p~=. & flag_p == 0 & flag_all_p_dom == 0 & num_prod ~= 1
predict ego if l~=. & k~=. & si~=. & i~=. & p~=. & flag_p == 0 & flag_all_p_dom == 0 & num_prod ~= 1, resid
replace ego=-ego
replace ie_f=exp(ie_f)



*SAVE AND COMPILE ESTIMATES

*Generate productivity estimates
drop temp
gen double temp=al*l+ak*k+ai*i if l!=. & k!=. & i!=.
gen double qg_mpf = yg - ego if yg~=. & i~=. & l~=. & k~=. & ego~=.
gen double logprod=qg_mpf-temp
gen double prod=exp(logprod)

*Domestic Markups
drop ln_temp_m
gen double ln_temp_m = ln(ie_f) - ln(ielas)
replace ln_temp_m = - ln_temp_m
gen double markup_dom = exp(ln_temp_m)

*All Foreign Markups
#delimit ;
local countries "AFG AGO AIA ALB ANT ARG ARM ATG AUS AUT AZE BDI BEL BEN BGD BGR BHR 
                   BHS BIH BLR BLZ BMU BOL BRA BRB CAF CAN CHE CHN CIV CMR COD COG 
				   COL CPV CRI CUB CYP CZE DEU DMA DNK DOM DZA ECU EGY ESP EST ETH
				   FIN FJI FLK FRA FSM GAB GBR GEO GHA GIN GMB GNQ GRC GRD GRL GTM 
				   GUY HKG HND HRV HTI HUN IDN IND IRL IRN IRQ ISL ISR ITA JAM JOR
				   JPN KAZ KEN KGZ KHM KIR KNA KOR KWT LAO LBN LBR LBY LCA LKA LTU 
				   LUX LVA MAC MAR MCO MDA MDG MDV MEX MHL MKD MLI MLT MMR MNG MOZ 
				   MRT MUS MWI MYS NAM NER NGA NIC NLD NOR NPL NZL OMN PAK PAN PER 
				   PHL PNG POL PRK PRT PRY QAT ROU RUS SAU SDN SEN SGP SLB SLE SLV 
				   SMR SPM SRB STP SUR SVK SVN SWE SWZ SYC SYR TGO THA TJK TON TTO 
				   TUN TUR TUV TWN TZA UAE UGA UKR URY USA UZB VCT VEN VNM VUT WSM 
				   YEM ZAF ZMB ZWE
				   ABW COK CYM GUM MSR MTQ NCL PRI PYF TCA VGB VIR" ;
#delimit cr
foreach coun of local countries  {
capture gen double markup_`coun' = markup_dom * (p_fjt_`coun' / p_fjt_dom)
}


*Save results
save markup_$ind, replace

